/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.ko;

import java.io.IOException;
import java.util.EnumSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;

/** Test Korean morphological analyzer */
public class TestKoreanAnalyzer extends BaseTokenStreamTestCase {
  public void testSentence() throws IOException {
    Analyzer a = new KoreanAnalyzer();
    assertAnalyzesTo(
        a,
        "한국은 대단한 나라입니다.",
        new String[] {"한국", "대단", "나라", "이"},
        new int[] {0, 4, 8, 10},
        new int[] {2, 6, 10, 13},
        new int[] {1, 2, 3, 1});
    a.close();
  }

  public void testStopTags() throws IOException {
    Set<POS.Tag> stopTags = EnumSet.of(POS.Tag.NNP, POS.Tag.NNG);
    Analyzer a = new KoreanAnalyzer(null, KoreanTokenizer.DecompoundMode.DISCARD, stopTags, false);
    assertAnalyzesTo(
        a,
        "한국은 대단한 나라입니다.",
        new String[] {"은", "대단", "하", "ᆫ", "이", "ᄇ니다"},
        new int[] {2, 4, 6, 6, 10, 10},
        new int[] {3, 6, 7, 7, 13, 13},
        new int[] {2, 1, 1, 1, 2, 1});
    a.close();
  }

  public void testUnknownWord() throws IOException {
    Analyzer a =
        new KoreanAnalyzer(
            null,
            KoreanTokenizer.DecompoundMode.DISCARD,
            KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS,
            true);

    assertAnalyzesTo(
        a,
        "2018 평창 동계올림픽대회",
        new String[] {"2", "0", "1", "8", "평창", "동계", "올림픽", "대회"},
        new int[] {0, 1, 2, 3, 5, 8, 10, 13},
        new int[] {1, 2, 3, 4, 7, 10, 13, 15},
        new int[] {1, 1, 1, 1, 1, 1, 1, 1});
    a.close();

    a =
        new KoreanAnalyzer(
            null,
            KoreanTokenizer.DecompoundMode.DISCARD,
            KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS,
            false);

    assertAnalyzesTo(
        a,
        "2018 평창 동계올림픽대회",
        new String[] {"2018", "평창", "동계", "올림픽", "대회"},
        new int[] {0, 5, 8, 10, 13},
        new int[] {4, 7, 10, 13, 15},
        new int[] {1, 1, 1, 1, 1});
    a.close();
  }

  /** blast random strings against the analyzer */
  public void testRandom() throws IOException {
    Random random = random();
    final Analyzer a = new KoreanAnalyzer();
    checkRandomData(random, a, atLeast(200));
    a.close();
  }

  /** blast some random large strings through the analyzer */
  public void testRandomHugeStrings() throws Exception {
    Random random = random();
    final Analyzer a = new KoreanAnalyzer();
    checkRandomData(random, a, RANDOM_MULTIPLIER, 4096);
    a.close();
  }

  @Nightly
  public void testRandomHugeStringsAtNight() throws Exception {
    Random random = random();
    final Analyzer a = new KoreanAnalyzer();
    checkRandomData(random, a, 3 * RANDOM_MULTIPLIER, 8192);
    a.close();
  }

  // Copied from TestKoreanTokenizer, to make sure passing
  // user dict to analyzer works:
  public void testUserDict() throws IOException {
    final Analyzer analyzer =
        new KoreanAnalyzer(
            TestKoreanTokenizer.readDict(),
            KoreanTokenizer.DEFAULT_DECOMPOUND,
            KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS,
            false);
    assertAnalyzesTo(
        analyzer,
        "c++ 프로그래밍 언어",
        new String[] {"c++", "프로그래밍", "언어"},
        new int[] {0, 4, 10},
        new int[] {3, 9, 12},
        new int[] {1, 1, 1});
  }
}
